/*NOTE: This is model code to illustrate the logic used to generate the datasets for analysis in the SCARP pilot.
This code is not intended to run as is - due to Epic restrictions on the sharing of intellectual property all 
of the table and attribute names have been "masked" with names enclosed in square brackets if they were pulled 
from Clarity (EPIC).*/

/*****************************************************
PROJECT:  BRIDGE C2 
PILOT: SCRAP
Date: 

DATE: 8/xx/2022
9/6/2022 corrected line 414
  from if screen_date<'29FEB2020'd then delete;
	to if screen_date>'29FEB2020'd then delete;
11/8/2022: original code did not include code for output table
code revised 
 

DESCRIPTION: Identiify patients in CVC cohort with abnormal pap results 
for power analysis on PAP followup

*****************************************************/;
options compress=yes reuse=yes;
libname RD odbc noprompt="dsn=research_dev;Trusted_Connection=yes" bulkload=yes;
libname SCRAP "e:\\sasroot\BRIDGE C2\SCRAP\";



/*--------------------------------------------------------------------------------------------*/
/*--Pap proc codes supplemented with non-surveillance codes*/
/*--------------------------------------------------------------------------------------------*/

/*internal look up list of PAP screens: scrap.PAP_PX_LU_for_ADVANCE*/;

proc sql;
create table rd.PAP_PX_LU_for_ADVANCE as select * from scrap.PAP_PX_LU_for_ADVANCE;

proc print data=rd.PAP_PX_LU_for_ADVANCE;
run;*contains HPV codes;

proc sql; create table rd.jpo_CVC_cohort as select * from scrap.scrap_cvc_patients;
run;


proc sql;
connect to odbc as mycon
(required = "dsn=OCHIN_HCN_FENWAY;Trusted_Connection=yes");
create table ADV_paps as
select * from connection to mycon (
select d.ADVANCE_patid,
d.raw_patid,
d.index_visit,
d.observation_end,
o.lab_result_cm_id as result_id,
o.lab_px,
o.lab_loinc,
lu.[procedure name],
o.abn_ind_description,
o.sas_result_date
from OCHIN.dbo.lab_result_cm o
inner join research_dev.dbo.PAP_PX_LU_for_ADVANCE lu
on lu.[procedure code]=o.lab_px
inner join research_dev.dbo.jpo_CVC_cohort d
on d.raw_patid=o.raw_patid
where o.result_date < '02-29-2020');


data paps1;*all pap tests ordered;
set ADV_paps;
length source $20;
format pap_date date9.;
source='ADVANCE';
where  ABN_IND_DESCRIPTION in('No Information','Abnormal','Abnormally high', 
'Abnormally low');
abnormal=ABN_IND_DESCRIPTION in('Abnormal','Abnormally high', 
'Abnormally low');
pap_date=sas_result_date;
if pap_date<'01JAN2011'd then delete;
if pap_date>'29FEB2020'd then delete;
run;

*screening orders and results from order_proc;
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table CVC_orders as 
select * from connection to mycon (
SELECT distinct
	den.raw_patid
	,op.[ORDER PROCEDURE IDENTIFIER]
    ,op.[ORDERING DATE]
	,op.[result time]
	,op.[procedure identifier]
	,[order status]
	,ppc.screen
	,ppc.[procedure name]
	,eap.[procedure code]
	,op.[pending order yes/no]
	,op.[ORDER PROCEDURE IDENTIFIER]
	,op.[abnormal yes/no]
	,op.[display name]
	,op.[ORDERING DATE]
	,op.[result time] 	
FROM
    research_dev.dbo.jpo_CVC_cohort  AS den
		inner join Clarity.dbo.[ORDER PROCEDURE] as op
			on den.raw_patid = op.[patient identifier] 
        inner join research_dev.dbo.jpo_CVC_screens_lu AS ppc
      		ON op.[procedure identifier] = ppc.[procedure identifier]
	  	left join clarity.dbo.[procedures records EAP] eap
			on ppc.[procedure identifier]=eap.[procedure identifier]
		where ppc.screen='PAP');
quit;
*3=resulted 5=completed;
*2=sent ;
* 4 canceled ;

data paps2;
set CVC_orders;
format pap_date date9.;
length source $20;
source='Resulted Order Proc';
pap_date=datepart([result time]);
where [result time]^=.  and [order status]^=4;
abnormal=[abnormal yes/no]='Y';
if pap_date<'01JUL2011'd then delete;
if pap_date>'29FEB2020'd then delete;

run;

proc sql;
connect to odbc as mycon (required='dsn=research_dev;Trusted_Connection=yes');                                                             
create table clarity_LAB_components as select *
from connection to mycon (
select distinct pt.raw_patid,  
	r.[COMPONENT IDENTIFIER], 	
	zr.[result status], 
	zo.[order status],
	r.[ORDER PROCEDURE IDENTIFIER], 
	e.[procedure code], 
	e.[procedure name], 
	o.[ORDERING DATE], 
	r.[result time], 
	r.[order value], 
	r.[order number value], 
	r.[result flag ], 
	zc.[result flag name],
	o.[patient encounter contact serial number identifier], 
	cc.[lo inc code] ,
	cc.[default lnc identifier] ,
	cc.[external name],
	ppc.screen,
	ppc.[procedure identifier]
from CLARITY.dbo.[ORDER RESULTS] r
inner join CLARITY.dbo.[ORDER PROCEDURE] o on o.[ORDER PROCEDURE IDENTIFIER]=r.[ORDER PROCEDURE IDENTIFIER]
inner join research_dev.dbo.jpo_CVC_cohort  pt on pt.raw_patid=o.[patient identifier]
left join CLARITY.dbo.[LAB STATUS] zr on zr.[lab status]=r.[lab status]
left join CLARITY.dbo.[ORDER STATUS] zo on zo.[order status]=o.[order status] 
left join clarity.dbo.[result flag] zc on r.[result flag ] = zc.[result flag ]
left join CLARITY.dbo.[procedures records EAP] e on e.[procedure identifier]=o.[procedure identifier]
left join clarity.dbo.[component] cc on cc.[COMPONENT IDENTIFIER]=r.[COMPONENT IDENTIFIER]
inner join research_dev.dbo.jpo_CVC_screens_lu AS ppc
on ppc.[COMPONENT IDENTIFIER]=r.[COMPONENT IDENTIFIER]
where ppc.[COMPONENT IDENTIFIER] is not null);

data paps3;
set  clarity_LAB_components; 
where [result flag name] in('Normal','Abnormal') and [result time]^=.;
format pap_date date9.;
length source $20;
source='LAB by compnent id';
pap_date=datepart([result time]);
if pap_date<'01JUL2011'd then delete;
if pap_date>'29FEB2020'd then delete;
abnormal=[result flag name] ='Abnormal';
run;

/*linked though LOINC code*/
proc sql;
connect to odbc as mycon (required='dsn=research_dev;Trusted_Connection=yes');                                                             
create table clarity_LAB_LNC_cc as select *
from connection to mycon (
select distinct pt.raw_patid,  
	r.[COMPONENT IDENTIFIER], 	
	zr.[result status], 
	zo.[order status],
	r.[ORDER PROCEDURE IDENTIFIER], 
	e.[procedure code], 
	e.[procedure name], 
	o.[ORDERING DATE], 
	r.[result time], 
	r.[order value], 
	r.[order number value], 
	r.[result flag ], 
	zc.[result flag name],
	o.[patient encounter contact serial number identifier],
	cc.[lo inc code], 
	cc.[default lnc identifier] ,
	cc.[external name],
	r.[component lnc identifier]
	ppc.[procedure identifier],
	ppc.screen
from CLARITY.dbo.[ORDER RESULTS] r
inner join CLARITY.dbo.[ORDER PROCEDURE] o on o.[ORDER PROCEDURE IDENTIFIER]=r.[ORDER PROCEDURE IDENTIFIER]
inner join research_dev.dbo.jpo_CVC_cohort  pt on pt.raw_patid=o.[patient identifier]
left join CLARITY.dbo.[LAB STATUS] zr on zr.[lab status]=r.[lab status]
left join CLARITY.dbo.[ORDER STATUS] zo on zo.[order status]=o.[order status] 
left join clarity.dbo.[result flag] zc on r.[result flag ] = zc.[result flag ]
left join CLARITY.dbo.[procedures records EAP] e on e.[procedure identifier]=o.[procedure identifier]
left join clarity.dbo.[component] cc on cc.[COMPONENT IDENTIFIER]=r.[COMPONENT IDENTIFIER]
inner join research_dev.dbo.jpo_CVC_screens_lu AS ppc
on ppc.LOinc_code=cc.[default lnc identifier]  
where ppc.loinc_code is not null and ppc.screen='PAP');

data paps4;
set  clarity_LAB_LNC_cc; 
where [result flag name] in('Abnormal','Normal');
format pap_date date9.;
length source $20;
source='LAB by LoinC';
pap_date=datepart([result time]);
if pap_date<'01JUL2011'd then delete;
if pap_date>'29FEB2020'd then delete;
abnormal=[result flag name] ='Abnormal';
run;

proc freq data=paps4;
tables abnormal source /missing;
run;

data paps;
set paps1 paps2 paps3 paps4;
loinc=lab_loinc;
if loinc=' ' then loinc=loinc_code;
run;



/*ask matthew */

data papsB;
set paps;
if lab_px in('87624','87625','LP2299','LP2854','LV1811',
'LP2507','LS161','LX042') or 
[procedure code] in('87624','87625','LP2299','LP2854','LV1811',
'LP2507','LS161','LX042','LS508','LV4924','LV5741','LS153','83036',
'LT465','LV2564','LV357','LV5973',
'LV3432','LS844','LT397','LP1394','LT706','LP406','82044','LT475',
'LS774','81025','81000','81002','LP040') or 
[procedure name] in('HISTORICAL PAP SMEAR','RFLX - HUMAN PAPILLOMA VIRUS HPV')
then do;
 	not_pap=1; flag_[procedure code]=1;
end;
if [procedure identifier] in( 140218 , 146070 , 140262 , 138721 , 181841 ,
 155165 , 144750 , 133117 , 64005 , 153958 , 75689 , 191068 ,
 109288 , 37665 , 37425 ) then do;
 	not_pap=1; flag_[procedure identifier]=1;
end;
if [COMPONENT IDENTIFIER] in ( 15526 , 74733 , 25794 , 18951 , 27336 ,
 32060 , 13810 , 83661 , 85982 , 29797 , 41963 , 43779 ,
 44135 , 45388 , 82881 , 82882 , 85983 , 41964 , 43780 ,
 44136 , 45389 , 44200079 , 2000002 , 58093 , 1430575 ,
 1375 , 4610 , 23176 , 44008 , 64106 , 67587 , 72811 ,
 82361 , 92728 , 2833 , 2278 , 13087 , 160903 , 40820 ,
 40821 , 13337 , 13338 , 7430 , 10899 , 112399 , 43500009 ,
 11711 , 31656 , 33232 , 33235 , 40954 , 43433 , 45563 ,
 64266 , 66744 , 70247 , 72402 , 80353 , 86152 , 41664 ,
 41668 , 41670 , 41760 , 61842 , 70250 , 68334 , 63608 ,
 1059 , 4695 , 46731 , 21041 , 65766 , 31156 , 45094 ,
 31157 ) then do;
	not_pap=1; flag_component=1;
end;
run;
/*add check of loinc codes here*/

proc freq data=papsB;
where flag_[procedure code]=1;
tables source lab_px*[procedure code]*[procedure name]/missing list;
run;
/*81000	URINLS DIP STICK/TABLET REAGNT NON-AUTO MICRSCPY*/
/*81002	URNLS DIP STICK/TABLET RGNT NON-AUTO W/O MICRSCP*/
/*81025	URINE PREGNANCY TEST VISUAL COLOR CMPRSN METHS*/
/*82044	URINE ALBUMIN SEMIQUANTITATIVE*/
/*87624	IADNA HUMAN PAPILLOMAVIRUS HIGH-RISK TYPES*/
/*87625	IADNA HUMAN PAPILLOMAVIRUS TYPES 16 & 18 ONLY*/
/*LP040	VITAMIN B12 & FOLATE*/
/*LP1394	SIBO: GLUCOSE TEST (POCT)*/
/*LP2299	HPV HR W/HV 16 & 18, RECTAL THINPREP VIAL*/
/*LP2507	OB PANEL (M-CAL)(MTY)*/
/*LP2854	CT/NG�AND�TRICH�RNA, QUALITATIVE, TMA, PAP VIAL*/
/*LS153	HEMOGLOBIN FINGERSTICK (85018)*/
/*LS161	CYTOLOGY NON-GYN, FLUID WASH/BRUSHINGS*/
/*LS508	CBC WITH AUTO DIFF*/
/*LT465	HPV DNA HIGH RISK*/
/*LT475	URINE CYTOLOGY*/
/*LT706	UA PRENATAL SHORT DIPSTICK*/
/*LV1811	HPV SCREENING (PAP VIAL)*/
/*LV2564	HPV DNA HIGH RISK (Q)*/
/*LV3432	HPV RNA E6/E7, SUREPATH VIAL*/
/*LV357	HPV DNA PROBE HIGH RISK*/
/*LV4924	DRUG SCREEN QUICKTOX (POCT)*/
/*LV5973 HPV GENOTYPING*/
/*LV5741	DRUG SCREEN QUICKTOX, BRANAN (POCT)*/
/*LX042	HUMAN PAPILLOMA VIRUS,HIGH AND LOW RISK*/

proc freq data=papsB;
where flag_[procedure identifier]=1 and flag_[procedure code]=.;
tables [procedure code]*[procedure identifier]*[procedure name]/missing list;
run;

/*[procedure code]	[procedure identifier]	[procedure name]*/
/*90649	37425	4VHPV VACCINE 3 DOSE SCHEDULE FOR IM USE*/
/*LP2347	140262	HPV GENO 16/18 + 45*/
/*LP2445	146070	HPV GENO 16, 18 + 45 (SUREPATH VIAL)*/
/*LR839	75689	RFLX - HUMAN PAPILLOMA VIRUS HPV*/
/*LS657	64005	NEISSERIA GONORRHOEAE DNA SDA PAP VIAL*/
/*LV1713	109288	TRICHOMONAS VAGINALIS,QL, TMA, PAP VIAL*/
/*LV2850	140218	HISTORICAL PAP SMEAR*/
/*LV3874	155165	HPV MRNA E6/E7, SUREPATH VIAL REFLEX HPV 16,18/45*/
/*LV4580	181841	HPV MRNA E6/E7, SUREPATH VIAL*/
/*LV5509	191068	TRICHOMONAS RRNA AMPLIFIED, TMA - PAP VIAL*/

proc freq data=papsB;
where flag_component=1;
tables [COMPONENT IDENTIFIER]*[external name]*[display name]/missing list;
run;

/*[COMPONENT IDENTIFIER]	[external name]*/
/*1059	HPV, HIGH-RISK*/
/*1375	HPV DNA (HIGH RISK)*/
/*2278	HPV DNA PROBE, HIGH RISK*/
/*2833	HPV DNA HIGH RISK*/
/*4610	HPV DNA (HIGH RISK)*/
/*4695	HPV, HIGH-RISK*/
/*7430	HPV HIGH RISK*/
/*10899	HPV HIGH RISK PROBE*/
/*11711	HPV INTERMEDIATE/HIGH RISK*/
/*13087	HPV DNA, HIGH RISK*/
/*13337	HPV GENOTYPE, 16*/
/*13338	HPV GENOTYPE, 18*/
/*13810	HPV (HIGH RISK)*/
/*15526	HIGH RISK*/
/*18951	HIGH RISK HPV, CERVIX*/
/*21041	HPV, LOW VOLUME RFX*/
/*23176	HPV DNA (HIGH RISK)*/
/*25794	HIGH RISK HPV DNA*/
/*27336	HIGH-RISK HPV*/
/*29797	HPV 16 GENOTYPE*/
/*31156	HPV16 DNA, INVADER(R)*/
/*31157	HPV18 DNA, INVADER(R)*/
/*31656	HPV MRNA E6/E7*/
/*32060	HPV*/
/*33232	HPV MRNA E6/E7*/
/*33235	HPV MRNA E6/E7*/
/*40820	HPV GENOTYPE 16*/
/*40821	HPV GENOTYPE 18, 45*/
/*40954	HPV MRNA E6/E7*/
/*41664	HPV MRNA E6/E7, SUREPATH VIAL*/
/*41668	HPV MRNA E6/E7, SUREPATH VIAL*/
/*41670	HPV MRNA E6/E7, SUREPATH VIAL*/
/*41760	HPV MRNA E6/E7, SUREPATH VIAL*/
/*41963	HPV 16 RNA*/
/*41964	HPV 18/45 RNA*/
/*43433	HPV MRNA E6/E7*/
/*43779	HPV 16 RNA*/
/*43780	HPV 18/45 RNA*/
/*44008	HPV DNA (HIGH RISK)*/
/*44135	HPV 16 RNA*/
/*44136	HPV 18/45 RNA*/
/*45094	HPV16 DNA, INVADER(R)*/
/*45388	HPV 16 RNA*/
/*45389	HPV 18/45 RNA*/
/*45563	HPV MRNA E6/E7*/
/*46731	HPV, HIGH-RISK*/
/*58093	HPV APTIMA*/
/*61842	HPV MRNA E6/E7, VAGINAL*/
/*63608	HPV mRNA E6/E7*/
/*64106	HPV DNA (HIGH RISK)*/
/*64266	HPV MRNA E6/E7*/
/*65766	HPV, high-risk*/
/*66744	HPV MRNA E6/E7*/
/*67587	HPV DNA (HIGH RISK)*/
/*68334	HPV OTHER HR TYPES*/
/*70247	HPV MRNA E6/E7*/
/*70250	HPV MRNA E6/E7, VAGINAL*/
/*72402	HPV MRNA E6/E7*/
/*72811	HPV DNA (HIGH RISK)*/
/*74733	HIGH RISK HPV*/
/*80353	HPV MRNA E6/E7*/
/*82361	HPV DNA (HIGH RISK)*/
/*82881	HPV 16 RNA*/
/*82882	HPV 18/45*/
/*83661	HPV (HUMAN PAPILLOMA)*/
/*85982	HPV 16*/
/*85983	HPV 18/45*/
/*86152	HPV MRNA E6/E7*/
/*92728	HPV DNA (HIGH RISK)*/
/*112399	HPV HIGH RISK, PROBE OFF PAP*/
/*160903	HPV DNA, HIGH RISK*/
/*1430575	HPV APTIMA*/
/*2000002	HPV 18/45 RNA,ANAL RECTAL*/
/*43500009	HPV HIGH RISK, PROBE OFF PAP*/
/*44200079	HPV 18/45 RNA*/

proc freq data=papsb;
where not_pap=.;
tables [procedure code]*[procedure identifier]*[procedure name]/missing list;
run;

proc freq data=papsb;
where not_pap=.;
tables [COMPONENT IDENTIFIER]*[external name]/missing list;
run;
proc freq data=papsb;
tables not_pap*abnormal/missing;
run;*18% of abnormal results flags are flagged as not PAP*/;

*separate records with LoinC codes to link to LOINC name;
data prob_pap1 prob_papL;
set papsb;
where not_pap=.;
if loinc=' ' then output prob_pap1;
if loinc^=' ' then output prob_papL;
run;

proc sql;
create table loinc as
select distinct p.LOINC,
cc.[external name],
count(distinct p.[COMPONENT IDENTIFIER]) as components,
count(p.loinc) as records
from prob_papL p left join clarity.clarity_component cc
on p.loinc= cc.[default lnc identifier] 
group by p.loinc, cc.[external name];

/*data exported to excel, classified (ambiguous/blak results looked up on google)
imposted to WORK.LOINC_category*/
/*data scrap.loinc_category;*/
/*set loinc_category;*/
/*if strip(loinc)='18906-8' then do;*/
/*	loinc_type='Other';*/
/*	exclude_result=1;*/
/*end;*/
/*if strip(loinc)='22638-1' then do;*/
/*	loinc_type='Pathology Report';*/
/*	exclude_result=0;*/
/*end;*/
/*if strip(loinc)='48640-7' then do;*/
/*	loinc_type='Other';*/
/*	exclude_result=1;*/
/*end;*/
/*if strip(loinc)='50388-8' then do;*/
/*	loinc_type='Gonorrhea';*/
/*	exclude_result=1;*/
/*end;*/
/*if strip(loinc)='52797-8' then do;*/
/*	loinc_type='Diagnosis';*/
/*	exclude_result=0;*/
/*end;*/
/*run;*/
proc freq data=scrap.loinc_category;
tables exclude_result;
run;*no missing;

proc sql;
create table papsLb as
select p.*,
L.Loinc_type,
L.exclude_result
from prob_papL p left join 
scrap.Loinc_category L
on p.loinc=L.loinc;
data papsLb;
set papsLb;
if loinc='18906-8' then do;
	loinc_type='Other';
	exclude_result=1;
end;
if loinc='22638-1' then do;
	loinc_type='Pathology Report';
	exclude_result=0;
end;
if loinc='48640-7' then do;
	loinc_type='Other';
	exclude_result=1;
end;
if loinc='50388-8' then do;
	loinc_type='Gonorrhea';
	exclude_result=1;
end;
if loinc='52797-8' then do;
	loinc_type='Diagnosis';
	exclude_result=0;
end;
run;
/*proc freq data=papsLb;*/
/*tables exclude_result*loinc_type*abnormal/missing nocol nopercent;*/
/*run;*/

data prob_pap1;
set prob_pap1;
exclude_result=0;
run;

data prob_paps_comb;
set prob_pap1 papsLb;
run;



/*/*deduplicate by source, date */*/;
proc sql;
create table pap_screen_orders as
select raw_patid,
pap_date format date9.,
source,
count(distinct result_id) as ADVANCE_results,
count(pap_date) as records,
sum(exclude_result) as nonpap_records,
sum(abnormal) as abnormal,
sum(abnormal*exclude_result) as exclude_abnormal
from prob_paps_comb
group by raw_patid, pap_date, source,[procedure code];

data pap_screen_orders;
set pap_screen_orders;
pap_records=records-nonPAP_records;
PAP_abnormal=abnormal-exclude_abnormal;
if pap_records=0 then mean_pap_abnormal=0;
if pap_records>0 then mean_PAP_abnormal=PAP_abnormal/PAP_records;
any_abnormal=abnormal>0;
any_abnormal_PAP=PAP_abnormal>0;
run;
proc freq data=pap_screen_orders;
tables any_abnormal*any_abnormal_PAP/nopercent nocol;
run;
*11% of PAP orders with an abnormal result are abnormal nonPAP only;

proc freq data=pap_screen_orders;
tables source pap_records--any_abnormal_pap;
run;
proc means data=pap_screen_orders;
class source;
var pap_records--any_abnormal_pap
;
run;

proc sql;
create table ADVANCE as 
select 1 as ADVANCE,
raw_patid,
pap_date,
records as ADV_records,
pap_records as ADV_PAP_records,
PAP_abnormal as ADV_PAP_abn,
mean_PAP_abnormal as ADV_mean_PAP_abn,
any_abnormal as ADV_any_abn,
any_abnormal_PAP as ADV_any_abn_PAP
from pap_screen_orders
where source='ADVANCE'
order by raw_patid, pap_date;

create table ORDER_PROC  as 
select 1 as ORDER_PROC,
raw_patid,
pap_date,
records as OP_records,
pap_records as OP_PAP_records,
PAP_abnormal as OP_PAP_abn,
mean_PAP_abnormal as OP_mean_PAP_abn,
any_abnormal as OP_any_abn,
any_abnormal_PAP as OP_any_abn_PAP
from pap_screen_orders
where source='Resulted Order Proc'
order by raw_patid, pap_date;

create table LOINC  as 
select raw_patid,
1 as ORD_Res_LOINC,
pap_date,
records as LNC_records,
pap_records as LNC_PAP_records,
PAP_abnormal as LNC_PAP_abn,
mean_PAP_abnormal as LNC_mean_PAP_abn,
any_abnormal as LNC_any_abn,
any_abnormal_PAP as LNC_any_abn_PAP
from pap_screen_orders
where source='LAB by LoinC'
order by raw_patid, pap_date;

create table COMPONENT  as 
select 1 as ORD_res_component,
raw_patid,
pap_date,
records as COMP_records,
pap_records as COMP_PAP_records,
PAP_abnormal as COMP_PAP_abn,
mean_PAP_abnormal as COMP_mean_PAP_abn,
any_abnormal as COMP_any_abn,
any_abnormal_PAP as COMP_any_abn_PAP
from pap_screen_orders
where source='LAB by compnent id'
order by raw_patid, pap_date;

quit;

data pap_screen_dates;
merge advance order_proc LOINC COMPONENT;
by raw_patid pap_date;
run;
data scrap.Clean_PAP_screen_dates_11092022;
set pap_screen_dates;
run;
data pap_screen_dates;
set scrap.Clean_PAP_screen_dates_11092022;
run;

proc freq data=pap_screen_dates;
tables ADV_any_abn_PAP*OP_any_abn_PAP*LNC_any_abn_PAP*COMP_any_abn_PAP/missing list;
run;
proc freq data=pap_screen_dates;
tables ADVANCE*Order_proc*Ord_res_Loinc*ord_res_Component/missing list;
run;

data abnormal_pap_dates (keep=raw_patid pap_date abnormal_pap);
set pap_screen_dates;
abnormal_pap=max(ADV_any_abn_PAP,OP_any_abn_PAP,LNC_any_abn_PAP,COMP_any_abn_PAP);
run;
data abnormal_pap_dates (keep=raw_patid pap_date abnormal_pap);
set pap_screen_dates;
abnormal_pap=max(ADV_any_abn_PAP,OP_any_abn_PAP,LNC_any_abn_PAP,COMP_any_abn_PAP);
run;


proc freq data=abnormal_pap_dates;
tables abnormal_pap;
run;

data scrap.temp_abnormal_pap_dates;
set abnormal_pap_dates;
run;
data abnormal_pap_dates;
set scrap.temp_abnormal_pap_dates;
run;

/*join to pat observation period, limiting to 5 years prior to index;*/
proc sql;
create table preindex as
select p.raw_patid,
p.pap_date,
d.index_visit,
p.abnormal_pap
from abnormal_pap_dates p
left join scrap.cvc_patient_covariates d
on p.raw_patid=d.raw_patid
where p.pap_date<d.index_visit
order by raw_patid, d.index_visit,p.pap_date;
/*last pap prior to index visit*/
data last_pap_pre;
set preindex;
by raw_patid;
if last.raw_patid then output;
run;
/*limit to paptients whose last pap before index was abnormal*/
data abnormal_last_pre;
set last_pap_pre;
where abnormal_pap=1;
run;


/*abnormal paps in study period*/
proc sql;
create table abnormal as
select p.raw_patid,
d.index_visit,
d.observation_end,
p.pap_date,
p.abnormal_pap
from abnormal_pap_dates p
left join scrap.cvc_patient_covariates d
on p.raw_patid=d.raw_patid
where p.pap_date between d.index_visit and
d.observation_end and abnormal_pap=1
order by raw_patid, p.pap_date;

/*limit to first abnormal*/
data first_abnormal;
set abnormal;
by raw_patid;
if first.raw_patid then output;
run;
proc sql;
create table cohort as
select c.*,
p.pap_date as last_pap_pre format date9.,
p.abnormal_pap as abnormal_pre,
s.pap_date as first_abnormal_study format date9.
from scrap.cvc_patient_covariates c left join 
abnormal_last_pre p on c.raw_patid=p.raw_patid
left join first_abnormal s on
s.raw_patid=c.raw_patid;

data cohort;
set cohort;
where abnormal_pre=1 or first_abnormal_study^=.;
run;

proc freq data=cohort;
tables sdh_food sdh_housing sdh_transportation;
run;

data cohort;
set cohort;
/*if there is an abnormal pap during study use that as index pap*/
if abnormal_pre=1 and first_abnormal_study=. then do;
	time_abnormal_pre_index=yrdif(last_pap_pre,index_visit,'age');
	time_observed_post_index=yrdif(index_visit,observation_end,'age');
end;
if first_abnormal_study^=. then do;
	time_abnormal_pre_index=0;
	time_to_abnormal=yrdif(index_visit, first_abnormal_study);
	time_post_abnormal=yrdif(first_abnormal_study,observation_end,'age');
end;
run;
proc freq data=cohort;
where abnormal_pre=1;
tables time_abnormal_pre_index;
run;


/***************************************************
last section of code creating file used for sample size numbers missing
****************************************************/
proc sql;
select count(distinct raw_patid) from scrap.abnormal_pap_cohort;

data scrap.abnormal_pap_cohort;
set scrap.abnormal_pap_cohort;
if abnormal_pre=1 then do;
	time_abnormal_pre_index=yrdif(last_pap_pre,index_visit,'age');
	time_post_index=yrdif(index_visit,observation_end,'age');
end;
run;
proc freq data=scrap.abnormal_pap_cohort;
tables abnormal_pre time_post_abnormal;
run;
proc freq data=scrap.abnormal_pap_cohort;
where last_pap_pre<index_visit and abnormal_pre=1;
tables time_abnormal_pre_index;
run;


/*drop patients whose last pap prior>5 years before index with no abnormal in study period*/
data cohort2;
set cohort;
if first_abnormal_study=. then do;
	if  time_abnormal_pre_index>5 then delete;
	/*flag for index visit within 3 months of abnormal*/
	observed_3month=time_abnormal_pre_index<0.25;
end;
observed_time_abn=max(time_observed_post_index,time_post_abnormal);
run;

data scrap.abnormal_pap_cohort_clean;
set cohort2;
run;
proc freq data=scrap.abnormal_pap_cohort_clean;
tables sdh_food SDH_housing SDH_transportation;
run;
proc format;
value pre_index 
	0 		='0'
	>0-0.1	='0.1'
	>0.1-0.5='0.5'
	>0.5-1	='1'
	>1-2	='2'
	>2-10	= 'GT 2';
value time_abn
	0-0.1		='0.1'
	>0.1-0.5	='0.5'
	>0.5-1		='1'
	>1-2		='2'
	>2-10			='GT 2';
value obs_time
	0			='0'
	>0-0.25		='0.25'
	>0.25-0.5	='0.5'
	>0.5-1		='1'
	>1-2		='2'
	>2-10			='GT 2';	
	
run;

proc sql;
select count(raw_patid) as total,
sum(sdh_food='1_Positive Screen') as food1,
sum(sdh_food='2_Negative Screen') as food2,
sum(sdh_food='3_Never Screened') as food3,
sum(sdh_housing='1_Positive Screen') as housing1,
sum(sdh_housing='2_Negative Screen') as housing2,
sum(sdh_housing='3_Never Screened') as housing3,
sum(sdh_transportation='1_Positive Screen') as transportation1,
sum(sdh_transportation='2_Negative Screen') as transportation2,
sum(sdh_transportation='3_Never Screened') as transportation3
from scrap.abnormal_pap_cohort_clean
where first_abnormal_study=.;
proc freq data=	scrap.abnormal_pap_cohort_clean;
where first_abnormal_study=.;
format time_abnormal_pre_index 	pre_index.;
table time_abnormal_pre_index;
run;
proc freq data=	scrap.abnormal_pap_cohort_clean;
where first_abnormal_study=.;
format time_abnormal_pre_index 	pre_index.;
table time_abnormal_pre_index*(sdh_food sdh_housing sdh_transportation);
run;

proc sql;
select count(raw_patid) as total,
sum(sdh_food='1_Positive Screen') as food1,
sum(sdh_food='2_Negative Screen') as food2,
sum(sdh_food='3_Never Screened') as food3,
sum(sdh_housing='1_Positive Screen') as housing1,
sum(sdh_housing='2_Negative Screen') as housing2,
sum(sdh_housing='3_Never Screened') as housing3,
sum(sdh_transportation='1_Positive Screen') as transportation1,
sum(sdh_transportation='2_Negative Screen') as transportation2,
sum(sdh_transportation='3_Never Screened') as transportation3
from scrap.abnormal_pap_cohort_clean
where first_abnormal_study^=.;

proc sql;
select sum(abnormal_pre) from scrap.abnormal_pap_cohort_clean
where first_abnormal_study^=.;


proc freq data=	scrap.abnormal_pap_cohort_clean;
where first_abnormal_study^=.;
format time_to_abnormal time_abn.;
table time_to_abnormal;
run;

proc freq data=	scrap.abnormal_pap_cohort_clean;
where first_abnormal_study^=.;
format time_to_abnormal time_abn.;
table time_to_abnormal*(sdh_food sdh_housing sdh_transportation);
run;


proc freq data=	scrap.abnormal_pap_cohort_clean;
format observed_time_abn obs_time.;
tables observed_time_abn;
run;
proc freq data=	scrap.abnormal_pap_cohort_clean;
format observed_time_abn obs_time.;
tables observed_time_abn*(sdh_food sdh_housing sdh_transportation)/norow nocol nopercent;
run;

/************************************************
SELECT Cohort: at least half of first 6 months post abnormal in 
observation period
************************************************/
data scrap.abnormal_pap_cohort_select;
set cohort2;
if first_abnormal_study=. then do;
	if time_abnormal_pre_index>0.25 then delete;
end;
if observed_time_abn<0.25 then delete;
run;

proc sql;
select count(raw_patid) as total,
sum(sdh_food='1_Positive Screen') as food1,
sum(sdh_food='2_Negative Screen') as food2,
sum(sdh_food='3_Never Screened') as food3,
sum(sdh_housing='1_Positive Screen') as housing1,
sum(sdh_housing='2_Negative Screen') as housing2,
sum(sdh_housing='3_Never Screened') as housing3,
sum(sdh_transportation='1_Positive Screen') as transportation1,
sum(sdh_transportation='2_Negative Screen') as transportation2,
sum(sdh_transportation='3_Never Screened') as transportation3
from scrap.abnormal_pap_cohort_select;

proc sql;
select count(raw_patid) as total,
sum(sdh_food='1_Positive Screen') as food1,
sum(sdh_food='2_Negative Screen') as food2,
sum(sdh_food='3_Never Screened') as food3,
sum(sdh_housing='1_Positive Screen') as housing1,
sum(sdh_housing='2_Negative Screen') as housing2,
sum(sdh_housing='3_Never Screened') as housing3,
sum(sdh_transportation='1_Positive Screen') as transportation1,
sum(sdh_transportation='2_Negative Screen') as transportation2,
sum(sdh_transportation='3_Never Screened') as transportation3
from scrap.abnormal_pap_cohort_select
where first_abnormal_study=.;
proc freq data=	scrap.abnormal_pap_cohort_select;
where first_abnormal_study^=.;
format time_abnormal_pre_index 	pre_index.;
table time_abnormal_pre_index;
run;
proc freq data=	scrap.abnormal_pap_cohort_select;
where first_abnormal_study=.;
format time_abnormal_pre_index 	pre_index.;
table time_abnormal_pre_index;
run;
proc freq data=	scrap.abnormal_pap_cohort_select;
where first_abnormal_study=.;
format time_abnormal_pre_index 	pre_index.;
table time_abnormal_pre_index*(sdh_food sdh_housing sdh_transportation)/norow nocol nopercent;
run;

proc sql;
select count(raw_patid) as total,
sum(sdh_food='1_Positive Screen') as food1,
sum(sdh_food='2_Negative Screen') as food2,
sum(sdh_food='3_Never Screened') as food3,
sum(sdh_housing='1_Positive Screen') as housing1,
sum(sdh_housing='2_Negative Screen') as housing2,
sum(sdh_housing='3_Never Screened') as housing3,
sum(sdh_transportation='1_Positive Screen') as transportation1,
sum(sdh_transportation='2_Negative Screen') as transportation2,
sum(sdh_transportation='3_Never Screened') as transportation3
from scrap.abnormal_pap_cohort_select
where first_abnormal_study^=.;

proc sql;
select sum(abnormal_pre) from scrap.abnormal_pap_cohort_select
where first_abnormal_study^=.;


proc freq data=	scrap.abnormal_pap_cohort_select;
where first_abnormal_study^=.;
format time_to_abnormal time_abn.;
table time_to_abnormal;
run;

proc freq data=	scrap.abnormal_pap_cohort_select;
where first_abnormal_study^=.;
format time_to_abnormal time_abn.;
table time_to_abnormal*(sdh_food sdh_housing sdh_transportation)/norow nocol nopercent;
run;


proc freq data=	scrap.abnormal_pap_cohort_select;
format observed_time_abn obs_time.;
tables observed_time_abn;
run;
proc freq data=	scrap.abnormal_pap_cohort_select;
format observed_time_abn obs_time.;
tables observed_time_abn*(sdh_food sdh_housing sdh_transportation)/norow nocol nopercent;
run;

*Check % of patients abnormal pap in study period 
with followup within 3 months;


